{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from keras import applications\n",
    "from keras.models import Sequential\n",
    "from keras.models import Model\n",
    "from keras.layers import Dropout, Flatten, Dense, Activation, Reshape, LeakyReLU\n",
    "from keras.callbacks import CSVLogger\n",
    "import tensorflow as tf\n",
    "from scipy.ndimage import imread\n",
    "import numpy as np\n",
    "import random\n",
    "from keras.layers import LSTM, CuDNNLSTM\n",
    "from keras.layers import Conv1D, MaxPooling1D\n",
    "from keras import backend as K\n",
    "import keras\n",
    "from keras.callbacks import CSVLogger, ModelCheckpoint\n",
    "from keras.backend.tensorflow_backend import set_session\n",
    "from keras import optimizers\n",
    "import h5py\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "import os\n",
    "import pandas as pd\n",
    "# import matplotlib\n",
    "import h5py\n",
    "from keras import regularizers\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with h5py.File(''.join(['bitcoin2015to2017_close.h5']), 'r') as hf:\n",
    "    datas = hf['inputs'].value\n",
    "    labels = hf['outputs'].value\n",
    "    input_times = hf['input_times'].value\n",
    "    output_times = hf['output_times'].value\n",
    "    original_inputs = hf['original_inputs'].value\n",
    "    original_outputs = hf['original_outputs'].value\n",
    "    original_datas = hf['original_datas'].value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "scaler=MinMaxScaler()\n",
    "#split training validation\n",
    "training_size = int(0.8* datas.shape[0])\n",
    "training_datas = datas[:training_size,:,:]\n",
    "training_labels = labels[:training_size,:,0]\n",
    "validation_datas = datas[training_size:,:,:]\n",
    "validation_labels = labels[training_size:,:,0]\n",
    "validation_original_outputs = original_outputs[training_size:,:,:]\n",
    "validation_original_inputs = original_inputs[training_size:,:,:]\n",
    "validation_input_times = input_times[training_size:,:,:]\n",
    "validation_output_times = output_times[training_size:,:,:]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(221, 256, 1)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "validation_original_inputs.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL']='2'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "config = tf.ConfigProto()\n",
    "config.gpu_options.allow_growth = True\n",
    "set_session(tf.Session(config=config))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(221, 272, 1)\n",
      "(221, 272, 1)\n"
     ]
    }
   ],
   "source": [
    "ground_true = np.append(validation_original_inputs,validation_original_outputs, axis=1)\n",
    "ground_true_times = np.append(validation_input_times,validation_output_times, axis=1)\n",
    "print ground_true_times.shape\n",
    "print ground_true.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "step_size = datas.shape[1]\n",
    "batch_size= 8\n",
    "nb_features = datas.shape[2]\n",
    "epochs = 30\n",
    "\n",
    "output_size=16\n",
    "units= 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(884, 256, 1)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_datas.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fit_lstm(reg):\n",
    "    global training_datas, training_labels, batch_size, epochs,step_size,nb_features, units\n",
    "    model = Sequential()\n",
    "    model.add(CuDNNLSTM(units=units, bias_regularizer=reg, input_shape=(step_size,nb_features),return_sequences=False))\n",
    "    model.add(Activation('tanh'))\n",
    "    model.add(Dropout(0.2))\n",
    "    model.add(Dense(output_size))\n",
    "    model.add(LeakyReLU())\n",
    "    model.compile(loss='mse', optimizer='adam')\n",
    "    model.fit(training_datas, training_labels, batch_size=batch_size, epochs = epochs, verbose=0)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def experiment(validation_datas,validation_labels,original_datas,ground_true,ground_true_times,validation_original_outputs, validation_output_times, nb_repeat, reg):\n",
    "    error_scores = list()\n",
    "    #get only the close data\n",
    "    ground_true = ground_true[:,:,0].reshape(-1)\n",
    "    ground_true_times = ground_true_times.reshape(-1)\n",
    "    ground_true_times = pd.to_datetime(ground_true_times, unit='s')\n",
    "    validation_output_times = pd.to_datetime(validation_output_times.reshape(-1), unit='s')\n",
    "    for i in range(nb_repeat):\n",
    "        model = fit_lstm(reg)\n",
    "        predicted = model.predict(validation_datas)\n",
    "        predicted_inverted = []\n",
    "        scaler.fit(original_datas[:,0].reshape(-1,1))\n",
    "        predicted_inverted.append(scaler.inverse_transform(predicted))\n",
    "        # since we are appending in the first dimension\n",
    "        predicted_inverted = np.array(predicted_inverted)[0,:,:].reshape(-1)\n",
    "        \n",
    "        error_scores.append(mean_squared_error(validation_original_outputs[:,:,0].reshape(-1),predicted_inverted))\n",
    "    return error_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training l1 0.0000,l2 0.0000\n"
     ]
    },
    {
     "ename": "NotImplementedError",
     "evalue": "reshaping is not supported for Index objects",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNotImplementedError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-12-c06db008394b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'l1 %.4f,l2 %.4f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mreg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0;32mprint\u001b[0m \u001b[0;34m\"Training \"\u001b[0m\u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m     \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexperiment\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalidation_datas\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvalidation_labels\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0moriginal_datas\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mground_true\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mground_true_times\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvalidation_original_outputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_output_times\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnb_repeat\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0;31m# print(results.describe())\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;31m# save boxplot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-11-bae5fdf15133>\u001b[0m in \u001b[0;36mexperiment\u001b[0;34m(validation_datas, validation_labels, original_datas, ground_true, ground_true_times, validation_original_outputs, validation_output_times, nb_repeat, reg)\u001b[0m\n\u001b[1;32m     13\u001b[0m         \u001b[0;31m# since we are appending in the first dimension\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m         \u001b[0mpredicted_inverted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpredicted_inverted\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m         \u001b[0mvalidation_output_times\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalidation_output_times\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m's'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m         \u001b[0merror_scores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmean_squared_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalidation_original_outputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mpredicted_inverted\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0merror_scores\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/data/khuangaf/miniconda2/envs/py27/lib/python2.7/site-packages/pandas/core/indexes/base.pyc\u001b[0m in \u001b[0;36mreshape\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1147\u001b[0m         \u001b[0mReshape\u001b[0m \u001b[0man\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1148\u001b[0m         \"\"\"\n\u001b[0;32m-> 1149\u001b[0;31m         raise NotImplementedError(\"reshaping is not supported \"\n\u001b[0m\u001b[1;32m   1150\u001b[0m                                   \"for Index objects\")\n\u001b[1;32m   1151\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNotImplementedError\u001b[0m: reshaping is not supported for Index objects"
     ]
    }
   ],
   "source": [
    "# iterate over different reglurizer\n",
    "regs = [regularizers.l1(0),regularizers.l1(0.1), regularizers.l1(0.01), regularizers.l1(0.001), regularizers.l1(0.0001),regularizers.l2(0.1), regularizers.l2(0.01), regularizers.l2(0.001), regularizers.l2(0.0001)]\n",
    "nb_repeat = 30\n",
    "results = pd.DataFrame()\n",
    "for reg in regs:\n",
    "    \n",
    "    name = ('l1 %.4f,l2 %.4f' % (reg.l1, reg.l2))\n",
    "    print \"Training \"+ str(name)\n",
    "    results[name] = experiment(validation_datas,validation_labels,original_datas,ground_true,ground_true_times,validation_original_outputs, validation_output_times, nb_repeat,reg)\n",
    "# print(results.describe())\n",
    "# save boxplot\n",
    "results.describe().to_csv('result/lstm_bias_reg.csv')\n",
    "results.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def fit_lstm(reg):\n",
    "    global training_datas, training_labels, batch_size, epochs,step_size,nb_features, units\n",
    "    model = Sequential()\n",
    "    model.add(CuDNNLSTM(units=units, kernel_regularizer=reg, input_shape=(step_size,nb_features),return_sequences=False))\n",
    "    model.add(Activation('tanh'))\n",
    "    model.add(Dropout(0.2))\n",
    "    model.add(Dense(output_size))\n",
    "    model.add(LeakyReLU())\n",
    "    model.compile(loss='mse', optimizer='adam')\n",
    "    model.fit(training_datas, training_labels, batch_size=batch_size, epochs = epochs, verbose=0)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# iterate over different reglurizer\n",
    "regs = [regularizers.l1(0),regularizers.l1(0.1), regularizers.l1(0.01), regularizers.l1(0.001), regularizers.l1(0.0001),regularizers.l2(0.1), regularizers.l2(0.01), regularizers.l2(0.001), regularizers.l2(0.0001)]\n",
    "nb_repeat = 30\n",
    "results = pd.DataFrame()\n",
    "for reg in regs:\n",
    "    \n",
    "    name = ('l1 %.4f,l2 %.4f' % (reg.l1, reg.l2))\n",
    "    print \"Training \"+ str(name)\n",
    "    results[name] = experiment(validation_datas,validation_labels,original_datas,ground_true,ground_true_times,validation_original_outputs, validation_output_times, nb_repeat,reg)\n",
    "\n",
    "results.describe().to_csv('result/lstm_kernel_reg.csv')\n",
    "results.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def fit_lstm(reg):\n",
    "    global training_datas, training_labels, batch_size, epochs,step_size,nb_features, units\n",
    "    model = Sequential()\n",
    "    model.add(CuDNNLSTM(units=units, activity_regularizer=reg, input_shape=(step_size,nb_features),return_sequences=False))\n",
    "    model.add(Activation('tanh'))\n",
    "    model.add(Dropout(0.2))\n",
    "    model.add(Dense(output_size))\n",
    "    model.add(LeakyReLU())\n",
    "    model.compile(loss='mse', optimizer='adam')\n",
    "    model.fit(training_datas, training_labels, batch_size=batch_size, epochs = epochs, verbose=0)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# iterate over different reglurizer\n",
    "regs = [regularizers.l1(0),regularizers.l1(0.1), regularizers.l1(0.01), regularizers.l1(0.001), regularizers.l1(0.0001),regularizers.l2(0.1), regularizers.l2(0.01), regularizers.l2(0.001), regularizers.l2(0.0001)]\n",
    "nb_repeat = 30\n",
    "results = pd.DataFrame()\n",
    "for reg in regs:\n",
    "    \n",
    "    name = ('l1 %.4f,l2 %.4f' % (reg.l1, reg.l2))\n",
    "    print \"Training \"+ str(name)\n",
    "    results[name] = experiment(validation_datas,validation_labels,original_datas,ground_true,ground_true_times,validation_original_outputs, validation_output_times, nb_repeat,reg)\n",
    "\n",
    "results.describe().to_csv('result/lstm_activity_reg.csv')\n",
    "results.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def fit_lstm(reg):\n",
    "    global training_datas, training_labels, batch_size, epochs,step_size,nb_features, units\n",
    "    model = Sequential()\n",
    "    model.add(CuDNNLSTM(units=units, recurrent_regularizer=reg, input_shape=(step_size,nb_features),return_sequences=False))\n",
    "    model.add(Activation('tanh'))\n",
    "    model.add(Dropout(0.2))\n",
    "    model.add(Dense(output_size))\n",
    "    model.add(LeakyReLU())\n",
    "    model.compile(loss='mse', optimizer='adam')\n",
    "    model.fit(training_datas, training_labels, batch_size=batch_size, epochs = epochs, verbose=0)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# iterate over different reglurizer\n",
    "regs = [regularizers.l1(0),regularizers.l1(0.1), regularizers.l1(0.01), regularizers.l1(0.001), regularizers.l1(0.0001),regularizers.l2(0.1), regularizers.l2(0.01), regularizers.l2(0.001), regularizers.l2(0.0001)]\n",
    "nb_repeat = 30\n",
    "results = pd.DataFrame()\n",
    "for reg in regs:\n",
    "    \n",
    "    name = ('l1 %.4f,l2 %.4f' % (reg.l1, reg.l2))\n",
    "    print \"Training \"+ str(name)\n",
    "    results[name] = experiment(validation_datas,validation_labels,original_datas,ground_true,ground_true_times,validation_original_outputs, validation_output_times, nb_repeat,reg)\n",
    "\n",
    "results.describe().to_csv('result/lstm_recurrent_reg.csv')\n",
    "results.describe()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
